clear all 
set more 1 
set maxvar 32000 
set matsize 11000 

**************************************************************** 
*Wave I clearning ;
**************************************************************** 
local do_here=1

*In-Home Interview (which include section of the code about friendship nominations at home) 
if `do_here'==1{ 

cd "$data\In Home Interview Files\allwave1" 
import sasxport "allwave1.xpt", clear  
rename _all , upper 

rename SCID idschool_inhome  
rename SSCID idschool_sister 
rename H1GI20 class_inhome 
rename COMMID idcommunity_inhome 
rename PC6B father_home
rename BIO_SEX bio_sex
rename PA36 important_boy_learn

replace important_boy_learn = . if important_boy_learn>5

replace father_home = . if father_home>1
replace bio_sex     = . if bio_sex>2

replace idschool_inhome ="" if idschool_inhome=="999" 
replace idschool_sister ="" if idschool_sister=="999"  

replace class_inhome = . if class_inhome>=96 

*Children Birth Month 
recode H1GI1M (96=.), gen (w1bmonth) 
*Children Birth Year 
recode H1GI1Y (96=.), gen (w1byear) 
gen w1bdate = mdy(w1bmonth, 15,1900+w1byear) 
format w1bdate %d 
gen w1idate=mdy(IMONTH, IDAY,1900+IYEAR) 
format w1idate %d 

gen age=int((w1idate-w1bdate)/365.25) 
replace age = S1 if age==. 


replace PA2=. if PA2==996  

gen age_mom = PA2 if PC1==1 

drop H1WP17K 
drop H1WP18K 

gen race = . 
*Hispanic, All Races  


replace race = 1 if H1GI4==1 
replace race = 2 if H1GI4~=1 & H1GI6B==1 
replace race = 3 if H1GI4~=1 & H1GI6D==1 
replace race = 4 if H1GI4~=1 & H1GI6C==1 
replace race = 5 if H1GI4~=1 & H1GI6E==1 
replace race = 6  if H1GI4~=1 & H1GI6A==1 

gen hispanic = . 
gen black = . 
gen other = . 

replace hispanic = 1 if race==1 
replace black = 1 if race == 2 
replace other = 1 if race~=1 & race~=2 & race~=. 

replace hispanic = 0 if black    ==1 | other == 1  
replace black    = 0 if hispanic ==1 | other == 1  
replace other    = 0 if hispanic ==1 | black == 1  

drop race 

*The order reflect ID , age , PPVT , 


* Grades (H1ED11-H1ED14 (last grades) at In-Home Interview 
*S10A-S10D grades In-School interview) , 
	
	
* Investment (H1WP17)

* (PC1=person who respondend questionaire) (PC2 if mother lives in the household)
* (PA12 Mother levels of education H1NM4 reported years of education for mother not at home) 

*refine mother years of education. Missing if refuse to answer or also if they 
* report to have never been to school (only 19 observations). 
replace PA12=. if PA12>=10 

*If missing use information of children self report of mother education at home interview 

replace PA12= H1NM4 if PA12==. & H1NM4<10  


*This is in 1994 dollars  
rename PA55 family_income 


*Codebook 
replace family_income = . if family_income >= 9996 


*


replace family_income = family_income*1000 

*Adjust in 2012 dollars (from Bureau CPI time series) 
*replace family_income= family_income*229.6/148.3 

sum family_income, detail 


order AID SQID age age_mom family_income AH_RAW H1ED11 H1ED12 H1ED13 H1ED14 H1WP17* H1WP18* ///
PC1 PC2 PA12 hispanic black other idschool_inhome  class_inhome  idcommunity_inhome PC10 PC11 PC12 PC13 H1WP1 H1WP2 H1WP3 H1WP4 H1WP5 H1WP6 H1WP7 bio_sex father_home important_boy_learn
 
keep AID SQID age age_mom family_income AH_RAW H1ED11 H1ED12 H1ED13 H1ED14 H1WP17* H1WP18* ///
PC1 PC2 PA12 hispanic black other idschool_inhome  class_inhome  idcommunity_inhome PC10 PC11 PC12 PC13 H1WP1 H1WP2 H1WP3 H1WP4 H1WP5 H1WP6 H1WP7 bio_sex father_home important_boy_learn


foreach x in H1WP1 H1WP2 H1WP3 H1WP4 H1WP5 H1WP6 H1WP7 PC10 PC11 PC12{
replace `x' = . if `x'>1
}

replace PC13 = . if  PC13 > 3
**************************** 
*Cognitive skills measures 
**************************** 

sum AH_RAW 
rename AH_RAW cogn1 


local list_grades = " H1ED11 H1ED12 H1ED13 H1ED14"  
*refine school grades 
*(grade are A=1, B=2, C=3, D=4) 
foreach x in `list_grades' { 
replace `x' =. if `x'>=5 
} 
sum `list_grades' 


local ind = 2 
foreach x in `list_grades' { 
rename `x' cogn`ind' 
local ind = `ind' + 1 
} 


******************************************* 
*Mother Time Investments 
******************************************* 

local list_inv = "H1WP17A H1WP17B H1WP17C H1WP17D H1WP17E H1WP17F H1WP17G H1WP17H H1WP17I H1WP17J" 

local index = 1 
foreach x in `list_inv' { 


replace `x' = . if `x'>1 
rename `x' inv`index' 



local index = `index' + 1 
} 

******************************************* 
*Father's Time Investments 
******************************************* 

local list_inv_f = "H1WP18A H1WP18B H1WP18C H1WP18D H1WP18E H1WP18F H1WP18G H1WP18H H1WP18I H1WP18J" 

local index = 1 
foreach x in `list_inv_f' { 
replace `x' = . if `x'>1 
rename `x' inv`index'_father 
local index = `index' + 1 
} 

************************************************************* 
* Re order the measures with a positive ordering (higher the more skills) 
************************************************************* 


foreach var of varlist cogn2 cogn3 cogn4 cogn5{ 
egen temp1 = max(`var') 
gen temp2 = -1*(`var' - temp1) 
gen `var'_temp = temp2 + 1 
drop `var' 
rename `var'_temp `var' 
drop temp* 
} 


order AID SQID age age_mom family_income PC1 PC2 PA12 hispanic black other inv* cogn* idschool_inhome  class_inhome   idcommunity_inhome  PC10 PC11 PC12 PC13 H1WP1 H1WP2 H1WP3 H1WP4 H1WP5 H1WP6 H1WP7 bio_sex father_home important_boy_learn

save $scratch\keyv_wIp.dta, replace 

clear all 

} 

*Open In-School interview Information 
local do_here=1 
if `do_here' ==1 { 

cd "$data\School Files\In-school Questionnaire\inschool" 
import sasxport "Inschool.xpt" , clear  
rename _all , upper 

*Keep measures for Extraversion (Non-cognitive) and In-school Intervies reported grades 

*List of variables to keep in In-School intervies:

*1) S10A S10B S10C S10D are the grades (English, Math, History, Science) 
*2) S46A S4AB S46C S46D are measures for non cognitive skills  
*3) S48 measure effort for homeworks 
*4) S59A to S59G are measures of risky behaviours 
*5) S62B S62E S62H  S62K S62M S62N S62O S62P OTHER MEASURES OF noncognitive skills 
*see Lundberg (2014)
* http://www.iza.org/conference_files/genderconvergence_2014/lundberg_s1315.pdf page 29 ) 

*6) S64 How many times involved in a fight 

rename S10A cogn2_school 
rename S10B cogn3_school 
rename S10C cogn4_school 
rename S10D cogn5_school 
rename S2 gender

replace gender = . if gender>2

foreach var of varlist cogn2_school cogn3_school cogn4_school cogn5_school{ 
replace `var' = . if `var'>4 
egen temp1 = max(`var') 
gen temp2 = -1*(`var' - temp1) 
gen `var'_temp = temp2 + 1 
drop `var' 
rename `var'_temp `var' 
drop temp* 
} 


gen race = . 
*Hispanic, All Races  


replace race = 1 if S4==1 
replace race = 2 if S4~=1 & S6B==1 
replace race = 3 if S4~=1 & S6D==1 
replace race = 4 if S4~=1 & S6C==1 
replace race = 5 if S4~=1 & S6E==1 
replace race = 6  if S4~=1 & S6A==1 

gen educ_mother_s = . 

replace educ_mother_s =8 if  S12==1  
replace educ_mother_s =10 if S12 ==2  
replace educ_mother_s =12 if S12 == 3 | S12==4 
replace educ_mother_s =13 if S12==5 | S12==6   
replace educ_mother_s =16 if S12 == 7 
replace educ_mother_s =17 if S12== 8   

gen hispanic_s = . 
gen black_s = . 
gen other_s = . 

replace hispanic_s = 1 if race==1 
replace black_s = 1 if race == 2 
replace other_s = 1 if race~=1 & race~=2 & race~=. 

replace hispanic_s = 0 if black_s    ==1 | other_s == 1  
replace black_s    = 0 if hispanic_s ==1 | other_s == 1  
replace other_s    = 0 if hispanic_s ==1 | black_s == 1  


keep AID SQID hispanic_s black_s other_s educ_mother_s cogn*  SSCHLCDE S3 S1 gender
 
rename SSCHLCDE idschool 
rename S3 class 
rename S1 age_school 

replace class = . if class > 12 

replace age_school = . if age_school>=99 


save $scratch\temp_inschool.dta, replace 

} 


*Here I merge the In-School Nominations data with previous measures from In-School Interview 

local do_here=1 

if `do_here'==1 { 

cd "$data\Friend Files\In-school Nominations\sfriend" 
import sasxport "sfriend.xpt" , clear  
rename _all , upper 

merge m:m SQID using $scratch\temp_inschool.dta 
drop _merge 

order AID SQID 

replace MF1AID=. if MF1AID==77777777 | MF1AID==88888888 | MF1AID==99999999 | MF1AID==99959995 
replace MF2AID=. if MF2AID==77777777 | MF2AID==88888888 | MF2AID==99999999 | MF2AID==99959995 
replace MF3AID=. if MF3AID==77777777 | MF3AID==88888888 | MF3AID==99999999 | MF3AID==99959995 
replace MF4AID=. if MF4AID==77777777 | MF4AID==88888888 | MF4AID==99999999 | MF4AID==99959995 
replace MF5AID=. if MF5AID==77777777 | MF5AID==88888888 | MF5AID==99999999 | MF5AID==99959995 

replace FF1AID=. if FF1AID==77777777 | FF1AID==88888888 | FF1AID==99999999 | FF1AID==99959995 
replace FF2AID=. if FF2AID==77777777 | FF2AID==88888888 | FF2AID==99999999 | FF2AID==99959995 
replace FF3AID=. if FF3AID==77777777 | FF3AID==88888888 | FF3AID==99999999 | FF3AID==99959995 
replace FF4AID=. if FF4AID==77777777 | FF4AID==88888888 | FF4AID==99999999 | FF4AID==99959995 
replace FF5AID=. if FF5AID==77777777 | FF5AID==88888888 | FF5AID==99999999 | FF5AID==99959995 


save $scratch\temp_merged.dta, replace 

*****************************;
*Merging In-Home Survey;
*****************************;

*drop if AID=="" 
use $scratch\keyv_wIp.dta, clear 
gen in_home = 1  


merge 1:m AID using $scratch\temp_merged.dta 


replace in_home =0 if in_home==. 
*Notice that from Add Health Website thay say that only 15356 kids with in-home interview
* where also inteviewed in class (for this reason I also have _merge==2). 

tab _merge 
tab in_home 

drop _merge 

destring idschool  idcommunity_inhome, replace 
 
order age class idschool   idcommunity  

*drop idschool_inhome class_inhome 


************************************* 
*generate mother educational info 
************************************* 

*Years of education for bio/adopt/step/foster mother 
gen educ_mother=. 
replace educ_mother=8 if  PA12==1 & (PC1==1 | PC1==2 | PC1==3 | PC1==4)   
replace educ_mother=10 if (PA12==2 | PA12==3) & (PC1==1 | PC1==2 | PC1==3 | PC1==4)   
replace educ_mother=12 if (PA12==4 | PA12==5) & (PC1==1 | PC1==2 | PC1==3 | PC1==4)   
replace educ_mother=13 if (PA12==6 | PA12==7) & (PC1==1 | PC1==2 | PC1==3 | PC1==4)   
replace educ_mother=16 if PA12==8 & (PC1==1 | PC1==2 | PC1==3 | PC1==4)   
replace educ_mother=17 if PA12==9 & (PC1==1 | PC1==2 | PC1==3 | PC1==4)   

****************************************************************************** 
* Combine race info from In-Home and In-School interview 
****************************************************************************** 


replace hispanic = hispanic_s    if in_home==0 
replace black    = black_s       if in_home==0 
replace other    = other_s       if in_home==0 

replace educ_mother = educ_mother_s if educ_mother ==. 

replace gender = bio_sex if gender==. & bio_sex!=.

drop hispanic_s black_s other_s 



save $scratch\temp_nomination.dta, replace 


use $scratch\temp_nomination.dta, clear


forvalues i = 2(1)5{ 
replace cogn`i' = cogn`i'_school if in_home==0 
replace cogn`i' = cogn`i'_school if cogn`i'==. 
} 

drop cogn2_school cogn3_school cogn4_school cogn5_school  



destring , replace 

keep AID  idschool idschool_inhome class class_inhome family_income cogn* age age_school  age_mom hispanic black other inv* educ_mother  ///
 MF* FF*  in_home   idcommunity  PC10 PC11 PC12 PC13 H1WP1 H1WP2 H1WP3 H1WP4 H1WP5 H1WP6 H1WP7 bio_sex father_home gender important_boy_learn
 
 
order AID  idschool idschool_inhome age age_school  age_mom class class_inhome family_income hispanic black other inv*  cogn* educ_mother ///
 MF* FF*  in_home   idcommunity PC10 PC11 PC12 PC13 H1WP1 H1WP2 H1WP3 H1WP4 H1WP5 H1WP6 H1WP7  bio_sex father_home gender important_boy_learn
 


save $scratch\waveI_addhealth.dta, replace 

} 

**************************************************************** 
*Wave II clearning 
**************************************************************** 

local do_waveII=1 

if `do_waveII==1' { 
*Wave II public to get activities and age 

cd "$data\In Home Interview Files\wave2" 
import sasxport "wave2.xpt" , clear  
rename _all , upper 

rename H2GI9 class_t2

local list_grades = " H2ED7 H2ED8 H2ED9 H2ED10"  

*refine school grades 
*(grade are A=1, B=2, C=3, D=4) 
foreach x in `list_grades' { 
replace `x' =. if `x'>=5 
} 
sum `list_grades' 


**************************************** 
* Rename Variables 
**************************************** 

local ind = 2 
foreach x in `list_grades' { 
rename `x' cogn`ind'_t2 
local ind = `ind' + 1 
} 


************************************************************* 
* Re order the measures with a positive ordering (higher the more skills) 
************************************************************* 

*Cognitive measures 

foreach var of varlist cogn2 cogn3 cogn4 cogn5{ 
egen temp1 = max(`var') 
gen temp2 = -1*(`var' - temp1) 
gen `var'_temp = temp2 + 1 
drop `var' 
rename `var'_temp `var' 
drop temp* 
} 


******************************************* 
*Mother Time Investments 
******************************************* 

local list_inv = "H2WP17A H2WP17B H2WP17C H2WP17D H2WP17E H2WP17F H2WP17G H2WP17H H2WP17I H2WP17J" 

local index = 1 
foreach x in `list_inv' { 


replace `x' = . if `x'>1 
rename `x' inv`index'_t2 



local index = `index' + 1 
} 

******************************************* 
*Father's Time Investments 
******************************************* 

local list_inv_f = "H2WP18A H2WP18B H2WP18C H2WP18D H2WP18E H2WP18F H2WP18G H2WP18H H2WP18I H2WP18J" 

local index = 1 
foreach x in `list_inv_f' { 
replace `x' = . if `x'>1 
rename `x' inv`index'_father_t2 
local index = `index' + 1 
} 

foreach x in  H2WP1 H2WP2 H2WP3 H2WP4 H2WP5 H2WP6 H2WP7 {
replace `x' = . if `x'>1
}

keep AID class_t2 cogn* SCID2 inv* H2GI10 H2WP1 H2WP2 H2WP3 H2WP4 H2WP5 H2WP6 H2WP7 

order AID class_t2 SCID2 cogn*  inv* H2WP1 H2WP2 H2WP3 H2WP4 H2WP5 H2WP6 H2WP7 

rename SCID2 idschool_t2    

destring , replace 

save $scratch\temp_waveII.dta, replace

cd "$data\Friend Files\Wave II In-Home Nominations\hfriend2"
import sasxport "hfriend2.xpt" , clear  
rename _all , upper 
destring _all, replace

replace MF_AID1=. if MF_AID1==77777777 | MF_AID1==88888888 | MF_AID1==99999999 | MF_AID1==55555555 
replace MF_AID2=. if MF_AID2==77777777 | MF_AID2==88888888 | MF_AID2==99999999 | MF_AID2==55555555 
replace MF_AID3=. if MF_AID3==77777777 | MF_AID3==88888888 | MF_AID3==99999999 | MF_AID3==55555555 
replace MF_AID4=. if MF_AID4==77777777 | MF_AID4==88888888 | MF_AID4==99999999 | MF_AID4==55555555 
replace MF_AID5=. if MF_AID5==77777777 | MF_AID5==88888888 | MF_AID5==99999999 | MF_AID5==55555555 

replace FF_AID1=. if FF_AID1==77777777 | FF_AID1==88888888 | FF_AID1==99999999 | FF_AID1==55555555 
replace FF_AID2=. if FF_AID2==77777777 | FF_AID2==88888888 | FF_AID2==99999999 | FF_AID2==55555555 
replace FF_AID3=. if FF_AID3==77777777 | FF_AID3==88888888 | FF_AID3==99999999 | FF_AID3==55555555 
replace FF_AID4=. if FF_AID4==77777777 | FF_AID4==88888888 | FF_AID4==99999999 | FF_AID4==55555555 
replace FF_AID5=. if FF_AID5==77777777 | FF_AID5==88888888 | FF_AID5==99999999 | FF_AID5==55555555 

sort AID

merge 1:1 AID using $scratch\temp_waveII.dta,
drop _merge

 
save $scratch\waveII_addhealth.dta, replace 

} 



**************************************************************** 
*Collect Contextual Information; 
**************************************************************** 

local do_here = 1 

if `do_here' == 1 { 


cd "$data\Contextual Files\Contextual - Wave I\Context1"
import sasxport "Context1.xpt", clear
keep aid bst90598 tst90598 tst90544 bst90544 tst90550 bst90550 tst90591 bst90591

destring aid, replace

foreach x in bst90598 tst90598 tst90544 bst90544 tst90550 bst90550 tst90591 bst90591 {
replace `x' = . if `x'==.g
replace `x' = . if `x'==.e
}


rename aid AID
rename tst90544 median_hhincome_tract
rename tst90550 sd_hhincome_tract
rename tst90591 mean_capitaincome_tract
rename bst90544 median_hhincome_block
rename bst90550 sd_hhincome_block
rename bst90591 mean_capitaincome_block

foreach x in  median_hhincome_tract median_hhincome_block sd_hhincome_tract sd_hhincome_block mean_capitaincome_tract mean_capitaincome_block {
replace `x' = `x'/10000
}

save $scratch\temp_waveI_context.dta, replace 

}

**************************************************************** 
*Merging All Together 
**************************************************************** 

local do_here = 1 

if `do_here' == 1 { 

use $scratch\waveII_addhealth.dta, clear 

merge 1:m AID using $scratch\waveI_addhealth.dta 

drop _merge  

order AID idschool class MF* FF* age age_mom cogn* family_income hispanic black other inv* educ_mother  in_home   idcommunity  PC10 PC11 PC12 PC13 H1WP1 H1WP2 H1WP3 H1WP4 H1WP5 H1WP6 H1WP7  H2WP1 H2WP2 H2WP3 H2WP4 H2WP5 H2WP6 H2WP7  bio_sex father_home gender important_boy_learn

gen cpi_2016_1994 = 240.007/148.3  

gen race = . 
replace race = 1 if black ==1  
replace race = 2 if hispanic == 1  
replace race = 3 if other    == 1  

gen real_family_income = family_income * cpi_2016_1994  

sum real_family_income , de  

*Using age at school for kids who I only have in-school interview and their grade at that moment 

gen age_new = age_school
replace age_new = age - 1  if age_new==. & age!=.

drop age
rename age_new   age

drop age_school

*replace age = age_school if in_home==0 
*drop age_school 

replace idschool = idschool_inhome if idschool==. &  idschool_inhome!=.
replace idschool = idschool_t2 if idschool==. &  idschool_t2!=.

drop idschool_inhome idschool_t2

replace class = class_inhome if class==.
replace class = class_t2-1 if class==. & class_inhome==. & class_t2<14
*graduated
replace class = 12 if  H2GI10==6 & class==.

replace class = 7 if class==. & age == 12
replace class = 8 if class==. & age == 13
replace class = 9 if class==. & age == 14
replace class = 10 if class==. & age == 15
replace class = 11 if class==. & age == 16
replace class = 12 if class==. & age == 17
replace class = 12 if class==. & age >=18


save $scratch\temp_data_WI_WII_nominations.dta, replace  

cd "$data\School Files\School Information Data\Schinfo" 
import sasxport "Schinfo.xpt" , clear 
rename _all , upper 

rename GRADES type_school_grades 

*keep SCID SAT_SCHL type_school_grades 

rename SCID idschool 
rename SAT_SCHL saturated  

destring idschool, replace 

sort idschool

save $scratch\temp_school_info.dta, replace

merge 1:m idschool using $scratch\temp_data_WI_WII_nominations.dta

drop if _merge==1

drop _merge

merge m:1 AID using $scratch\temp_waveI_context.dta

save $scratch\data_WI_WII_nominations.dta, replace 
} 

